# encoding: utf-8
"""
@author: 夏洛
@QQ: 1972386194
@file: 去哪儿还原.py
"""


html = '''
<em class="rel">
  <b style="width:48px;left:-48px">
    <i title="619" style="width: 16px;">1</i>
    <i title="619" style="width: 16px;">6</i>
    <i title="619" style="width: 16px;">9</i>
  </b>
  <b title="619" style="width: 16px;left:-48px">6</b>
  <b title="619" style="width: 16px;left:-32px">1</b>
</em>
'''


from lxml import etree

htmls  = etree.HTML(html)

ss = []
tag = htmls.xpath('//em[@class="rel"]/b')
for i in htmls.xpath('//em[@class="rel"]/b/i'):
    xx = i.xpath('./text()')[0]
    ss.append(xx)
print(ss)

#提取偏移量
styles = [(tag.text,tag.xpath('@style')[0]) for tag in tag[1:]]
# [('6', 'width: 16px;left:-48px'), ('1', 'width: 16px;left:-32px')]
import re
for p,t in styles:
    left = re.search('width: 16px;left:-(\d+)px',t).group(1)
    ss[-int(left) // 16] = p
print(ss)

'''
CSS反爬虫  最重要的是找规律

'''
